library(tidyverse)
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages -----------------------------------------------------------------
filter(): dplyr, stats
lag(): dplyr, stats
library(vcfR)
***** *** vcfR *** *****
This is vcfR 1.5.0
browseVignettes('vcfR') # Documentation
citation('vcfR') # Citation
***** ***** ***** *****
format.names <- str_split(data$FORMAT[[1]],pattern=":")[[1]]
data2 <- data %>%
select(-INFO,-ID,-FILTER, -FORMAT) %>%
mutate(S95=str_replace_all(S95,"([^0-9]|$)\\.","\\1NA")) %>% # convert missing data "." into NA
mutate(S96=str_replace_all(S96,"([^0-9]|$)\\.","\\1NA")) %>%
separate(S95,sep=":",into = str_c("S95_",format.names), convert = TRUE) %>%
separate(S96,sep=":",into=str_c("S96_",format.names), convert = TRUE) %>%
filter(!(S95_GT=="1/1" & S96_GT=="1/1")) %>%
filter(!(S95_GT=="0/0" & S96_GT=="0/0")) %>%
select(-ends_with("DPR"))
data2
calculate allele frequencies and polarize based on S95 for each SNP
data2 <- data2 %>%
mutate(S95_alt_AF=S95_AO/S95_DP,
S96_alt_AF=S96_AO/S96_DP,
S95_alt_major=S95_alt_AF > 0.5)
data2 <- data2 %>%
mutate(S95_AF_polar=ifelse(S95_alt_major,S95_alt_AF,1-S95_alt_AF),
S96_AF_polar=ifelse(S95_alt_major,S96_alt_AF,1-S96_alt_AF))
data2
rolling mean
plot!
plA.data <- pl.data2 %>% filter(str_detect(CHROM,"chrA"),!str_detect(CHROM,"random"))
plA <- ggplot(plA.data,aes(x=POS,y=AF_100,color=population,shape=population)) +
facet_wrap(~CHROM, scales = "free_x",ncol=2) +
#geom_point(alpha=.1) +
geom_line()
plA

plot!
plC.data <- pl.data2 %>% filter(str_detect(CHROM,"chrC"),!str_detect(CHROM,"random"))
plC <- ggplot(plC.data,aes(x=POS,y=AF_100,color=population,shape=population)) +
facet_wrap(~CHROM, scales = "free_x",ncol=2) +
#geom_point(alpha=.1) +
geom_line()
plC

plot one per chromosome
chrom.names <- unique(pl.data2$CHROM) %>% str_subset("[0-9]$")
chrom.names
[1] "chrA01" "chrA02" "chrA03" "chrA04" "chrA05" "chrA06" "chrA07" "chrA08" "chrA09" "chrA10" "chrC01" "chrC02" "chrC03" "chrC04"
[15] "chrC05" "chrC06" "chrC07" "chrC08" "chrC09"
for (chr in chrom.names) {
pl.data2 %>% filter(str_detect(CHROM,chr)) %>%
ggplot(aes(x=POS,y=AF_100,color=population)) +
geom_line() +
ggtitle(chr) +
ylim(0,1)
ggsave(str_c(chr,".png"),height = 8, width = 12)
}
absolute difference in allele frequency
data3 <- data2 %>%
group_by(CHROM) %>%
mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
abs_delta_af_100=roll_mean(abs_delta_af,n=100,fill=NaN),
abs_delta_af_500=roll_mean(abs_delta_af,n=500,fill=NaN)
)
data3
plot one per chromosome
for (chr in chrom.names) {
data3 %>% filter(str_detect(CHROM,chr)) %>%
ggplot(aes(x=POS,y=abs_delta_af_100)) +
geom_line() +
ggtitle(chr) +
ylim(0,1)
ggsave(str_c("Delta_af",chr,".png"),height = 8, width = 12)
}

Try filtering for F2 good snps
F2snps <- read_tsv("Final_F2_SNP_Sites.tab")
Parsed with column specification:
cols(
CHROM = col_character(),
POS = col_integer()
)
data2.F2.filtered <- semi_join(data2,F2snps)
Joining, by = c("CHROM", "POS")
dim(data2)
[1] 1926088 26
dim(F2snps)
[1] 18226 2
dim(data2.F2.filtered)
[1] 1997 26
data3.F2 <- data2.F2.filtered %>%
group_by(CHROM) %>%
mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
abs_delta_af_10=roll_mean(abs_delta_af,n=10,fill=NaN)
)
data3.F2
for (chr in chrom.names) {
data3.F2 %>% filter(str_detect(CHROM,chr)) %>%
ggplot(aes(x=POS,y=abs_delta_af_10)) +
geom_line() +
ggtitle(chr) +
ylim(0,1)
ggsave(str_c("Delta_af_F2_filter",chr,".png"),height = 8, width = 12)
}
filter on 505
snps.505 <- read_csv("../kiat/Breeding_Prediction_App/Data/505/505_Genotype_170K_SNPs.csv.gz") %>%
select(CHROM,POS)
Duplicated column names deduplicated: 'ID_505_K200' => 'ID_505_K200_1' [21]Parsed with column specification:
cols(
.default = col_integer(),
MARKER = col_character(),
CHROM = col_character(),
REF = col_character(),
ALT = col_character()
)
See spec(...) for full column specifications.
head(snps.505)
data2.505.filtered <- semi_join(data2,snps.505)
Joining, by = c("CHROM", "POS")
dim(data2)
[1] 1926088 26
dim(snps.505)
[1] 174397 2
dim(data2.505.filtered)
[1] 14116 26
data3.505 <- data2.505.filtered %>%
group_by(CHROM) %>%
mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
abs_delta_af_10=roll_mean(abs_delta_af,n=10,fill=NaN)
)
data3.505
for (chr in chrom.names) {
data3.505 %>% filter(str_detect(CHROM,chr)) %>%
ggplot(aes(x=POS,y=abs_delta_af_10)) +
geom_point(aes(y=abs_delta_af),shape=1) +
geom_line(color="blue") +
ggtitle(chr) +
ylim(0,1)
ggsave(str_c("Delta_af_505_filter",chr,".png"),height = 8, width = 12)
}
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHZjZlIpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShSY3BwUm9sbCkKYGBgCgpgYGB7cn0KY25hbWVzIDwtIHN0cl9zcGxpdCgiQ0hST00gIFBPUyAgICAgSUQgICAgICBSRUYgICAgIEFMVCAgICAgUVVBTCAgICBGSUxURVIgIElORk8gICAgRk9STUFUICBTOTUgICAgIFM5NiIscGF0dGVybj1ib3VuZGFyeSgid29yZCIpKVtbMV1dCgoKZGF0YSA8LSByZWFkX3RzdigiUzk1Uzk2X2ZpbHRlcmVkLnZjZi5yZWNvZGUudmNmIixjb21tZW50PSIjIixjb2xfbmFtZXMgPSBjbmFtZXMpCmRhdGEKYGBgCgpgYGB7cn0KZm9ybWF0Lm5hbWVzIDwtIHN0cl9zcGxpdChkYXRhJEZPUk1BVFtbMV1dLHBhdHRlcm49IjoiKVtbMV1dCgpkYXRhMiA8LSBkYXRhICU+JQogIHNlbGVjdCgtSU5GTywtSUQsLUZJTFRFUiwgLUZPUk1BVCkgJT4lCiAgbXV0YXRlKFM5NT1zdHJfcmVwbGFjZV9hbGwoUzk1LCIoW14wLTldfCQpXFwuIiwiXFwxTkEiKSkgJT4lICMgY29udmVydCBtaXNzaW5nIGRhdGEgIi4iIGludG8gTkEKICBtdXRhdGUoUzk2PXN0cl9yZXBsYWNlX2FsbChTOTYsIihbXjAtOV18JClcXC4iLCJcXDFOQSIpKSAlPiUKICBzZXBhcmF0ZShTOTUsc2VwPSI6IixpbnRvID0gc3RyX2MoIlM5NV8iLGZvcm1hdC5uYW1lcyksIGNvbnZlcnQgPSBUUlVFKSAlPiUKICBzZXBhcmF0ZShTOTYsc2VwPSI6IixpbnRvPXN0cl9jKCJTOTZfIixmb3JtYXQubmFtZXMpLCBjb252ZXJ0ID0gVFJVRSkgJT4lCiAgZmlsdGVyKCEoUzk1X0dUPT0iMS8xIiAmIFM5Nl9HVD09IjEvMSIpKSAlPiUKICBmaWx0ZXIoIShTOTVfR1Q9PSIwLzAiICYgUzk2X0dUPT0iMC8wIikpICU+JQogIHNlbGVjdCgtZW5kc193aXRoKCJEUFIiKSkKCmRhdGEyCmBgYAoKCgpjYWxjdWxhdGUgYWxsZWxlIGZyZXF1ZW5jaWVzIGFuZCBwb2xhcml6ZSBiYXNlZCBvbiBTOTUKZm9yIGVhY2ggU05QIApgYGB7cn0KZGF0YTIgPC0gZGF0YTIgJT4lCiAgbXV0YXRlKFM5NV9hbHRfQUY9Uzk1X0FPL1M5NV9EUCwKICAgICAgICAgUzk2X2FsdF9BRj1TOTZfQU8vUzk2X0RQLAogICAgICAgICBTOTVfYWx0X21ham9yPVM5NV9hbHRfQUYgPiAwLjUpCmRhdGEyIDwtIGRhdGEyICU+JQogIG11dGF0ZShTOTVfQUZfcG9sYXI9aWZlbHNlKFM5NV9hbHRfbWFqb3IsUzk1X2FsdF9BRiwxLVM5NV9hbHRfQUYpLAogICAgICAgICBTOTZfQUZfcG9sYXI9aWZlbHNlKFM5NV9hbHRfbWFqb3IsUzk2X2FsdF9BRiwxLVM5Nl9hbHRfQUYpKQpkYXRhMiAgICAgICAgIApgYGAKCnJvbGxpbmcgbWVhbgpgYGB7cn0KcGwuZGF0YTIgPC0gZGF0YTIgJT4lCiAgZ2F0aGVyKGtleT0icG9wdWxhdGlvbiIsdmFsdWU9ImFsbGVsZS5mcmVxdWVuY3kiLFM5NV9BRl9wb2xhcixTOTZfQUZfcG9sYXIpICU+JQogIGFycmFuZ2UoQ0hST00sUE9TKSAlPiUKICBncm91cF9ieShwb3B1bGF0aW9uLENIUk9NKSAlPiUKICBtdXRhdGUoQUZfMTAwPXJvbGxfbWVhbihhbGxlbGUuZnJlcXVlbmN5LG49MTAwLGZpbGw9TmFOKSkKcGwuZGF0YTIKYGBgCgoKcGxvdCEKYGBge3J9CnBsQS5kYXRhIDwtIHBsLmRhdGEyICU+JSBmaWx0ZXIoc3RyX2RldGVjdChDSFJPTSwiY2hyQSIpLCFzdHJfZGV0ZWN0KENIUk9NLCJyYW5kb20iKSkgCmBgYAoKYGBge3J9CnBsQSA8LSBnZ3Bsb3QocGxBLmRhdGEsYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24sc2hhcGU9cG9wdWxhdGlvbikpICsKICBmYWNldF93cmFwKH5DSFJPTSwgc2NhbGVzID0gImZyZWVfeCIsbmNvbD0yKSArCiAgI2dlb21fcG9pbnQoYWxwaGE9LjEpICsKICBnZW9tX2xpbmUoKQpwbEEKYGBgCgoKcGxvdCEKYGBge3J9CnBsQy5kYXRhIDwtIHBsLmRhdGEyICU+JSBmaWx0ZXIoc3RyX2RldGVjdChDSFJPTSwiY2hyQyIpLCFzdHJfZGV0ZWN0KENIUk9NLCJyYW5kb20iKSkgCmBgYAoKYGBge3J9CnBsQyA8LSBnZ3Bsb3QocGxDLmRhdGEsYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24sc2hhcGU9cG9wdWxhdGlvbikpICsKICBmYWNldF93cmFwKH5DSFJPTSwgc2NhbGVzID0gImZyZWVfeCIsbmNvbD0yKSArCiAgI2dlb21fcG9pbnQoYWxwaGE9LjEpICsKICBnZW9tX2xpbmUoKQpwbEMKYGBgCgpwbG90IG9uZSBwZXIgY2hyb21vc29tZQpgYGB7cn0KY2hyb20ubmFtZXMgPC0gdW5pcXVlKHBsLmRhdGEyJENIUk9NKSAlPiUgc3RyX3N1YnNldCgiWzAtOV0kIikKY2hyb20ubmFtZXMKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBwbC5kYXRhMiAlPiUgZmlsdGVyKHN0cl9kZXRlY3QoQ0hST00sY2hyKSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24pKSArCiAgICBnZW9tX2xpbmUoKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKGNociwiLnBuZyIpLGhlaWdodCA9IDgsIHdpZHRoID0gMTIpCn0KYGBgCgoKIyMgYWJzb2x1dGUgZGlmZmVyZW5jZSBpbiBhbGxlbGUgZnJlcXVlbmN5CgpgYGB7cn0KZGF0YTMgPC0gZGF0YTIgJT4lCiAgZ3JvdXBfYnkoQ0hST00pICU+JQogIG11dGF0ZShhYnNfZGVsdGFfYWYgPSBhYnMoUzk1X0FPL1M5NV9EUCAtIFM5Nl9BTy9TOTZfRFApLAogICAgICAgICBhYnNfZGVsdGFfYWZfMTAwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj0xMDAsZmlsbD1OYU4pLAogICAgICAgICBhYnNfZGVsdGFfYWZfNTAwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj01MDAsZmlsbD1OYU4pCiAgICAgICAgICkKZGF0YTMKYGBgCgpwbG90IG9uZSBwZXIgY2hyb21vc29tZQoKYGBge3J9CmZvciAoY2hyIGluIGNocm9tLm5hbWVzKSB7CiAgZGF0YTMgJT4lIGZpbHRlcihzdHJfZGV0ZWN0KENIUk9NLGNocikpICU+JQogICAgZ2dwbG90KGFlcyh4PVBPUyx5PWFic19kZWx0YV9hZl8xMDApKSArCiAgICBnZW9tX2xpbmUoKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKCJEZWx0YV9hZiIsY2hyLCIucG5nIiksaGVpZ2h0ID0gOCwgd2lkdGggPSAxMikKfQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGF0YTMsIGFlcyh4PVM5NV9EUCkpICsgZ2VvbV9oaXN0b2dyYW0oKQpnZ3Bsb3QoZGF0YTMsIGFlcyh4PVM5Nl9EUCkpICsgZ2VvbV9oaXN0b2dyYW0oKQoKYGBgCgoKIyMgVHJ5IGZpbHRlcmluZyBmb3IgRjIgZ29vZCBzbnBzCgpgYGB7cn0KRjJzbnBzIDwtIHJlYWRfdHN2KCJGaW5hbF9GMl9TTlBfU2l0ZXMudGFiIikKYGBgCgpgYGB7cn0KZGF0YTIuRjIuZmlsdGVyZWQgPC0gc2VtaV9qb2luKGRhdGEyLEYyc25wcykKCmRpbShkYXRhMikKZGltKEYyc25wcykKZGltKGRhdGEyLkYyLmZpbHRlcmVkKQpgYGAKCmBgYHtyfQpkYXRhMy5GMiA8LSBkYXRhMi5GMi5maWx0ZXJlZCAlPiUKICBncm91cF9ieShDSFJPTSkgJT4lCiAgbXV0YXRlKGFic19kZWx0YV9hZiA9IGFicyhTOTVfQU8vUzk1X0RQIC0gUzk2X0FPL1M5Nl9EUCksCiAgICAgICAgIGFic19kZWx0YV9hZl8xMD1yb2xsX21lYW4oYWJzX2RlbHRhX2FmLG49MTAsZmlsbD1OYU4pCiAgICAgICAgICkKZGF0YTMuRjIKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBkYXRhMy5GMiAlPiUgZmlsdGVyKHN0cl9kZXRlY3QoQ0hST00sY2hyKSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHg9UE9TLHk9YWJzX2RlbHRhX2FmXzEwKSkgKwogICAgZ2VvbV9saW5lKCkgKwogICAgZ2d0aXRsZShjaHIpICsKICAgIHlsaW0oMCwxKQogIGdnc2F2ZShzdHJfYygiRGVsdGFfYWZfRjJfZmlsdGVyIixjaHIsIi5wbmciKSxoZWlnaHQgPSA4LCB3aWR0aCA9IDEyKQp9CmBgYAoKIyMgZmlsdGVyIG9uIDUwNQoKYGBge3J9CnNucHMuNTA1IDwtIHJlYWRfY3N2KCIuLi9raWF0L0JyZWVkaW5nX1ByZWRpY3Rpb25fQXBwL0RhdGEvNTA1LzUwNV9HZW5vdHlwZV8xNzBLX1NOUHMuY3N2Lmd6IikgJT4lCiAgc2VsZWN0KENIUk9NLFBPUykKaGVhZChzbnBzLjUwNSkKYGBgCgpgYGB7cn0KZGF0YTIuNTA1LmZpbHRlcmVkIDwtIHNlbWlfam9pbihkYXRhMixzbnBzLjUwNSkKCmRpbShkYXRhMikKZGltKHNucHMuNTA1KQpkaW0oZGF0YTIuNTA1LmZpbHRlcmVkKQpgYGAKCmBgYHtyfQpkYXRhMy41MDUgPC0gZGF0YTIuNTA1LmZpbHRlcmVkICU+JQogIGdyb3VwX2J5KENIUk9NKSAlPiUKICBtdXRhdGUoYWJzX2RlbHRhX2FmID0gYWJzKFM5NV9BTy9TOTVfRFAgLSBTOTZfQU8vUzk2X0RQKSwKICAgICAgICAgYWJzX2RlbHRhX2FmXzEwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj0xMCxmaWxsPU5hTikKICAgICAgICAgKQpkYXRhMy41MDUKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBkYXRhMy41MDUgJT4lIGZpbHRlcihzdHJfZGV0ZWN0KENIUk9NLGNocikpICU+JQogICAgZ2dwbG90KGFlcyh4PVBPUyx5PWFic19kZWx0YV9hZl8xMCkpICsKICAgIGdlb21fcG9pbnQoYWVzKHk9YWJzX2RlbHRhX2FmKSxzaGFwZT0xKSArCiAgICBnZW9tX2xpbmUoY29sb3I9ImJsdWUiKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKCJEZWx0YV9hZl81MDVfZmlsdGVyIixjaHIsIi5wbmciKSxoZWlnaHQgPSA4LCB3aWR0aCA9IDEyKQp9CmBgYA==